import urllib.request
import re
url="http://blog.csdn.net/"

#伪装
headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0")
opener=urllib.request.build_opener()
opener.addheaders=[headers]
urllib.request.install_opener(opener)
data=urllib.request.urlopen(url).read().decode("utf-8","ignore")
pat='<a strategy="(.*?)" href="(.*?)"'
rst=re.compile(pat).findall(data)
for i in range(0,len(rst)):
    link=rst[i]
    print(link[1])
    urllib.request.urlretrieve(link[1],"E:/CSDN/abc/data/"+str(i)+".html")

